Use Dataset created from 02_CLS_Data_Summary_2022_0914_Data_Analysis File

Loading Data

Load Google Sheet

Final_CLS_2022_Study_List_Non_Search_model_file <- read_sheet(
  "https://docs.google.com/spreadsheets/d/1N48rTeq7md0v8w8pG_8XIiuapPHQAeO5WoWIB3eaceI/edit#gid=1449351377",
  sheet = "FinalDataset_2022_Update"
) %>%
  mutate(
    Significant_Spend =
      as.numeric(
        case_when(
          probability_of_lift >= 0.9 ~ 1,
          TRUE ~ 0
        )
      ),
    country = case_when(
      country == "NA" ~ "US",
      TRUE ~ country
    ),
    region_v2 = case_when(
      country == "US" ~ "NA",
      country == "CA" ~ "NA",
      country == "US + CA" ~ "NA",
      TRUE ~ region
    )
  ) %>%
  filter(channel != "Search") %>%
  # filter out studies without reported lifts
  filter(exposed != -1) %>%
  # filter out google pay study
  filter(study_id != "149142217") %>%
  # filter out very negative absolute lifts
  filter(absolute_lift > -1000) %>%
  mutate(
    pa = case_when(
      pa == "Google Ads" ~ "SMB", # Step 1
      pa == "YouTube" & conversion != "Type 256522942 ([MCC] YouTube TV - Web - Trial Start)" ~ "YTMP", # Step 2
      pa == "YouTube Premium" ~ "YTMP", # Step 2
      conversion == "Type 256522942 ([MCC] YouTube TV - Web - Trial Start)" ~ "YouTube TV", # Step 2
      pa == "Cloud" & conversion != "Type 14257803 (Enterprise - Apps - Signup Confirm - Unique)" ~ "Cloud Workspace", # Step 3
      pa == "Cloud" & conversion == "Type 14257803 (Enterprise - Apps - Signup Confirm - Unique)" ~ "Cloud GCP", # Step 3
      pa == "Project Fi" ~ "Google Fi", # Step 4
      pa == "Google Chrome" ~ "Chrome",
      TRUE ~ pa
    )
  ) %>%
  mutate(
    parsed_type = parse_number(conversion),
    grouped_conversion = case_when(
      conversion %in% c("Chromebook Microsite Referral Clicks Q4 2015", "Type 251422729 (Chromebooks Microsite Referral Clicks (Q4 2017))") ~ "Chromebook Referrals",
      conversion %in% c("Desktop Downloads", "Type 11541547 (Desktop Download)") ~
        "Desktop Downloads",
      pa == "Pixel" ~ "Mobile Conversions",
      pa == "DSM" ~ "Non-Mobile Device Conversions",
      conversion == "Type 302982954 (Lena - P Lead)" ~ "Lena P Lead",
      conversion == "Type 288347008 (LENA - B Lead)" ~ "Lena B Lead",
      conversion == "Type 288697653 (LENA - Q Lead)" ~ "Lena Q Lead",
      parsed_type %in% c(181283993, 855508686) ~ "Workspace Free Trial Start",
      parsed_type == 330755641 ~ "Microsite Conversions",
      parsed_type == 14257803 ~ "Enterprise Signups",
      parsed_type == 289680712 ~ "Google(iOs) First Open",
      parsed_type == 256522942 ~ "YouTube TV - Web - Trial Start",
      parsed_type %in% c(452391534, 221497833, 277150074) ~ "Trial Signups Complete",
      TRUE ~ conversion
    ),
    pa = case_when(
      conversion == "Type 288697653 (LENA - Q Lead)" ~ "SMB-QLead",
      TRUE ~ pa
    )
  ) %>%
  filter(absolute_lift > 0)


# all.equal(Final_CLS_2022_Study_List_Non_Search_model_file,Final_CLS_2022_Study_List_Non_Search_v3)

Create All Response Curves only normal powers

Folder for all Output and scripts

file.sources <- list.files(path = "RScripts/", pattern = "*.R", full.names = TRUE)
sapply(file.sources, source, .GlobalEnv)
        RScripts/best_ind_function.R RScripts/export_rplots_function.R RScripts/export_rplots_function2.R
value   ?                            ?                                 ?                                 
visible FALSE                        FALSE                             FALSE                             
        RScripts/graphing_function.R RScripts/graphing_function_elasticnet.R RScripts/graphing_function_rlm.R
value   ?                            ?                                       ?                               
visible FALSE                        FALSE                                   FALSE                           
        RScripts/graphing_function2.R RScripts/graphing_function3.R RScripts/graphing_function4.R
value   ?                             ?                             ?                            
visible FALSE                         FALSE                         FALSE                        
        RScripts/graphing_function4_w_anom.R RScripts/model_wrapper_function.R RScripts/model_wrapper_function2.R
value   ?                                    ?                                 ?                                 
visible FALSE                                FALSE                             FALSE                             
        RScripts/named_group_split.R RScripts/names_function.R RScripts/ridge_lasso_function.R RScripts/ridge_lasso_function2.R
value   ?                            ?                         ?                               ?                               
visible FALSE                        FALSE                     FALSE                           FALSE                           
        RScripts/ridge_lasso_function4.R RScripts/rlm_function.R RScripts/rlm_function2.R
value   ?                                ?                       ?                       
visible FALSE                            FALSE                   FALSE                   

Check parameters


### powers to try
powers <- seq(0.1, 0.9, by = 0.01)
powers2 <- 1

### Powers to Try
#powers <- seq(0.1, 0.9, by = 0.01)
#powers2 <-seq(1.5,3, by = 0.25)


### Lambda parameters
parameters <- c(
  #  seq(0.1, 2, by =0.1) ,  seq(2, 5, 0.5) ,
  seq(5, 29, 1)
  ,seq(30, 102, 4)
  ,seq(110, 1000, 15)
  ,seq(1000, 10020, 500)
)

### elasticnet parameters
alpha_parameters <- c(seq(0, 1, 0.25))

# For Testing Purposes
#alpha_parameters <- c(seq(1, 1, 1))

Testing Different Model Types

Chrome

Data Readin


start_time <- Sys.time()

Final_CLS_2022_Study_List_Non_Search_model_file_chrome_pre <-
  Final_CLS_2022_Study_List_Non_Search_model_file %>%
  filter(pa == "Chrome") %>%
  mutate(
    id2 = row_number()
  )

df_test <-
  Final_CLS_2022_Study_List_Non_Search_model_file_chrome_pre %>%
  select(
    region_v2, country, channel, tactic,
    cost_spent_on_exposed_group:absolute_lift
  )

iso_chrome <- isolationForest$new(sample_size = nrow(df_test), num_trees = 10000, seed = 1153)

iso_chrome$fit(df_test)

scores_train <- df_test %>%
  iso_chrome$predict() %>%
  arrange(desc(anomaly_score))

Final_CLS_2022_Study_List_Non_Search_model_file_chrome_pre2 <-
  Final_CLS_2022_Study_List_Non_Search_model_file_chrome_pre %>%
  left_join(scores_train, by = c("id2" = "id")) %>% 
  filter(average_depth > 3)


Final_CLS_2022_Study_List_Non_Search_model_file_chrome <-
  Final_CLS_2022_Study_List_Non_Search_model_file_chrome_pre2 %>%
  named_group_split(tactic)

Run Model



fits_non_search_chrome <- model_wrapper_function(df = Final_CLS_2022_Study_List_Non_Search_model_file_chrome,poly_ind = 0)

best_ind_non_search_chrome <- 
  lapply(1:length(Final_CLS_2022_Study_List_Non_Search_model_file_chrome), best_ind_function,df = fits_non_search_chrome,
         df2 = Final_CLS_2022_Study_List_Non_Search_model_file_chrome) 

coef_non_search_chrome <- best_ind_non_search_chrome %>% bind_rows #make a matrix of all coefs

best_fit_non_search_chrome <- best_ind_non_search_chrome %>%
  set_names(names_function(Final_CLS_2022_Study_List_Non_Search_model_file_chrome_pre, tactic))  

Create Graph Object

graph_list_chrome <- lapply(1:length(best_fit_non_search_chrome), graphing_function4, df1 = best_fit_non_search_chrome, df2 = Final_CLS_2022_Study_List_Non_Search_model_file_chrome)
end_time <- Sys.time()

time_chrome = end_time - start_time

time_chrome

Cloud

Data Readin

start_time <- Sys.time()

Final_CLS_2022_Study_List_Non_Search_model_file_cloud_pre <-
  Final_CLS_2022_Study_List_Non_Search_model_file %>%
  filter(pa %in% c("Cloud GCP", "Cloud Workspace")) %>%
  mutate(
    pa = "Cloud",
    pa2 = "Cloud - All Channel"
  ) %>%
  mutate(
    id2 = row_number()
  )

df_test <-
  Final_CLS_2022_Study_List_Non_Search_model_file_cloud_pre %>%
  # select(-study_id, -id2, -region, -scaling_factor, -quarter, -pa, -study_name)
  select(
    region_v2, country, channel, tactic,
    # treatment_user_count:control,
    cost_spent_on_exposed_group:absolute_lift, parsed_type
  )

iso_cloud <- isolationForest$new(sample_size = nrow(df_test), num_trees = 10000, seed = 1153)

iso_cloud$fit(df_test)

scores_train <- df_test %>%
  iso_cloud$predict() %>%
  arrange(desc(anomaly_score))

Final_CLS_2022_Study_List_Non_Search_model_file_cloud_pre2 <-
  Final_CLS_2022_Study_List_Non_Search_model_file_cloud_pre %>%
  left_join(scores_train, by = c("id2" = "id"))

Final_CLS_2022_Study_List_Non_Search_model_file_cloud <-
  Final_CLS_2022_Study_List_Non_Search_model_file_cloud_pre2 %>%
  named_group_split(pa2)

Run Model

fits_non_search_cloud <- model_wrapper_function(df = Final_CLS_2022_Study_List_Non_Search_model_file_cloud,poly_ind = 0)

best_ind_non_search_cloud <- 
  lapply(1:length(Final_CLS_2022_Study_List_Non_Search_model_file_cloud), best_ind_function,df = fits_non_search_cloud,
         df2 = Final_CLS_2022_Study_List_Non_Search_model_file_cloud) 

coef_non_search_cloud <- best_ind_non_search_cloud %>% bind_rows #make a matrix of all coefs

best_fit_non_search_cloud <- best_ind_non_search_cloud %>%
  set_names(names_function(Final_CLS_2022_Study_List_Non_Search_model_file_cloud_pre, pa2))  

Create Graph Object

graph_list_cloud <- lapply(1:length(best_fit_non_search_cloud), graphing_function4, df1 = best_fit_non_search_cloud, df2 = Final_CLS_2022_Study_List_Non_Search_model_file_cloud)
end_time <- Sys.time()

time_cloud = end_time - start_time

YouTube

Data Readin


start_time <- Sys.time()

Final_CLS_2022_Study_List_Non_Search_model_file_youtube_pre <-
  Final_CLS_2022_Study_List_Non_Search_model_file %>%
  filter(pa %in% c("YouTube TV", "YTMP")) %>%
  mutate(
    pa = "YouTube",
    pa2 = "YouTube"
  ) %>%
  #  filter(absolute_lift < 5000) %>%
  mutate(
    id2 = row_number()
  )

df_test <-
  Final_CLS_2022_Study_List_Non_Search_model_file_youtube_pre %>%
  # select(-study_id, -id2, -region, -scaling_factor, -quarter, -pa, -study_name)
  select(
    region_v2, country, channel, tactic,
    # treatment_user_count:control,
    cost_spent_on_exposed_group:absolute_lift, parsed_type
  )

iso_yt <- isolationForest$new(sample_size = nrow(df_test), num_trees = 10000, seed = 1153)

iso_yt$fit(df_test)

scores_train <- df_test %>%
  iso_yt$predict() %>%
  arrange(desc(anomaly_score))

Final_CLS_2022_Study_List_Non_Search_model_file_youtube_pre2 <-
  Final_CLS_2022_Study_List_Non_Search_model_file_youtube_pre %>%
  left_join(scores_train, by = c("id2" = "id")) %>%
  filter(average_depth > 3.89)

Final_CLS_2022_Study_List_Non_Search_model_file_youtube <-
  Final_CLS_2022_Study_List_Non_Search_model_file_youtube_pre2 %>%
  named_group_split(region_v2)

Run Model

fits_non_search_youtube <- model_wrapper_function(df = Final_CLS_2022_Study_List_Non_Search_model_file_youtube,poly_ind = 0)

best_ind_non_search_youtube <- 
  lapply(1:length(Final_CLS_2022_Study_List_Non_Search_model_file_youtube), best_ind_function,df = fits_non_search_youtube,
         df2 = Final_CLS_2022_Study_List_Non_Search_model_file_youtube) 

coef_non_search_youtube <- best_ind_non_search_youtube %>% bind_rows #make a matrix of all coefs

best_fit_non_search_youtube <- best_ind_non_search_youtube %>%
  set_names(names_function(Final_CLS_2022_Study_List_Non_Search_model_file_youtube_pre, pa2))  

Create Graph Object

graph_list_youtube <- lapply(1:length(best_fit_non_search_youtube), graphing_function4, df1 = best_fit_non_search_youtube, df2 = Final_CLS_2022_Study_List_Non_Search_model_file_youtube)
end_time <- Sys.time()

time_youtube = end_time - start_time

DSM

Data Readin


start_time <- Sys.time()

Final_CLS_2022_Study_List_Non_Search_model_file_dsm_pre <-
  Final_CLS_2022_Study_List_Non_Search_model_file %>%
  filter(pa == "DSM") %>%
  filter(region_v2 != "APAC") %>%
  # filter(absolute_lift < 1000) # %>%
  # filter(study_id != '6297420') #%>%
  #  filter(study_id !='149161711') %>%
  #  filter(study_id != '148613002') %>%
  # filter(study_id !='3284625') %>%
  #  filter(study_id !='3329131')
  mutate(
    id2 = row_number()
  )

df_test <-
  Final_CLS_2022_Study_List_Non_Search_model_file_dsm_pre %>%
  # select(-study_id, -id2, -region, -scaling_factor, -quarter, -pa, -study_name)
  select(
    region_v2, country, channel, tactic,
    # treatment_user_count:control,
    cost_spent_on_exposed_group:absolute_lift
  )

iso_dsm <- isolationForest$new(sample_size = nrow(df_test), num_trees = 10000, seed = 1152)

iso_dsm$fit(df_test)

scores_train <- df_test %>%
  iso_dsm$predict() %>%
  arrange(desc(anomaly_score))

Final_CLS_2022_Study_List_Non_Search_model_file_dsm_pre2 <-
  Final_CLS_2022_Study_List_Non_Search_model_file_dsm_pre %>%
  left_join(scores_train, by = c("id2" = "id")) %>%
  filter(average_depth > 5)

Final_CLS_2022_Study_List_Non_Search_model_file_dsm <-
  Final_CLS_2022_Study_List_Non_Search_model_file_dsm_pre2 %>%
  named_group_split(region_v2, channel)

Run Model

fits_non_search_dsm <- model_wrapper_function(df = Final_CLS_2022_Study_List_Non_Search_model_file_dsm,poly_ind = 0)

best_ind_non_search_dsm <- 
  lapply(1:length(Final_CLS_2022_Study_List_Non_Search_model_file_dsm), best_ind_function,df = fits_non_search_dsm,
         df2 = Final_CLS_2022_Study_List_Non_Search_model_file_dsm)

coef_non_search_dsm <- best_ind_non_search_dsm %>% bind_rows #make a matrix of all coefs

best_fit_non_search_dsm <- best_ind_non_search_dsm %>%
  set_names(names_function(Final_CLS_2022_Study_List_Non_Search_model_file_dsm_pre, region_v2,channel))  

Create Graph Object

graph_list_dsm <- lapply(1:length(best_fit_non_search_dsm), graphing_function4, df1 = best_fit_non_search_dsm, df2 = Final_CLS_2022_Study_List_Non_Search_model_file_dsm)
end_time <- Sys.time()

time_dsm = end_time - start_time

Pixel

Data Readin


start_time <- Sys.time()

Final_CLS_2022_Study_List_Non_Search_model_file_pixel_pre <-
  Final_CLS_2022_Study_List_Non_Search_model_file %>%
  filter(pa == "Pixel") %>%
  mutate(
    pa2 = "Pixel - All Channel"
  ) %>%
  #   filter(absolute_lift < 1000)  %>%
  # filter(study_id != '6297420') #%>%
  #  filter(study_id !='149161711') %>%
  #  filter(study_id != '148613002') %>%
  # filter(study_id !='3284625') %>%
  #  filter(study_id !='3329131')
  mutate(
    id2 = row_number()
  )

df_test <-
  Final_CLS_2022_Study_List_Non_Search_model_file_pixel_pre %>%
  # select(-study_id, -id2, -region, -scaling_factor, -quarter, -pa, -study_name)
  select(
    region_v2, country, channel, tactic,
    # treatment_user_count:control,
    cost_spent_on_exposed_group:absolute_lift
  )

iso_pixel <- isolationForest$new(sample_size = nrow(df_test), num_trees = 10000, seed = 1152)

iso_pixel$fit(df_test)

scores_train <- df_test %>%
  iso_pixel$predict() %>%
  arrange(desc(anomaly_score))

Final_CLS_2022_Study_List_Non_Search_model_file_pixel_pre2 <-
  Final_CLS_2022_Study_List_Non_Search_model_file_pixel_pre %>%
  left_join(scores_train, by = c("id2" = "id")) %>%
  filter(average_depth > 3.1)

Final_CLS_2022_Study_List_Non_Search_model_file_pixel_pre2


Final_CLS_2022_Study_List_Non_Search_model_file_pixel <-
  Final_CLS_2022_Study_List_Non_Search_model_file_pixel_pre2 %>%
  named_group_split(pa2)

Run Model

fits_non_search_pixel <- model_wrapper_function(df = Final_CLS_2022_Study_List_Non_Search_model_file_pixel,poly_ind = 0)

best_ind_non_search_pixel <- 
  lapply(1:length(Final_CLS_2022_Study_List_Non_Search_model_file_pixel), best_ind_function,df = fits_non_search_pixel,
         df2 = Final_CLS_2022_Study_List_Non_Search_model_file_pixel) 

coef_non_search_pixel <- best_ind_non_search_pixel %>% bind_rows #make a matrix of all coefs

best_fit_non_search_pixel <- best_ind_non_search_pixel %>%
  set_names(names_function(Final_CLS_2022_Study_List_Non_Search_model_file_pixel_pre, pa2))  

Create Graph Object

graph_list_pixel <- lapply(1:length(best_fit_non_search_pixel), graphing_function4, df1 = best_fit_non_search_pixel, df2 = Final_CLS_2022_Study_List_Non_Search_model_file_pixel)
end_time <- Sys.time()

time_pixel = end_time - start_time

Fi

Data Readin


start_time <- Sys.time()

Final_CLS_2022_Study_List_Non_Search_model_file_fi_pre <-
  Final_CLS_2022_Study_List_Non_Search_model_file %>%
  filter(pa == "Google Fi") %>%
  mutate(
    pa2 = "Fi - All Channel"
  ) %>%
  #   filter(absolute_lift < 1000)  %>%
  # filter(study_id != '6297420') #%>%
  #  filter(study_id !='149161711') %>%
  #  filter(study_id != '148613002') %>%
  # filter(study_id !='3284625') %>%
  #  filter(study_id !='3329131')
  mutate(
    id2 = row_number()
  )

df_test <-
  Final_CLS_2022_Study_List_Non_Search_model_file_fi_pre %>%
  # select(-study_id, -id2, -region, -scaling_factor, -quarter, -pa, -study_name)
  select(
    region_v2, country, channel, tactic,
    # treatment_user_count:control,
    cost_spent_on_exposed_group:absolute_lift
  )

iso_fi <- isolationForest$new(sample_size = nrow(df_test), num_trees = 10000, seed = 1152)

iso_fi$fit(df_test)

scores_train <- df_test %>%
  iso_fi$predict() %>%
  arrange(desc(anomaly_score))

Final_CLS_2022_Study_List_Non_Search_model_file_fi_pre2 <-
  Final_CLS_2022_Study_List_Non_Search_model_file_fi_pre %>%
  left_join(scores_train, by = c("id2" = "id")) %>%
  filter(average_depth > 4.75)

Final_CLS_2022_Study_List_Non_Search_model_file_fi_pre2


Final_CLS_2022_Study_List_Non_Search_model_file_fi <-
  Final_CLS_2022_Study_List_Non_Search_model_file_fi_pre2 %>%
  named_group_split(channel)

Run Model

fits_non_search_fi <- model_wrapper_function(df = Final_CLS_2022_Study_List_Non_Search_model_file_fi,poly_ind = 0)

best_ind_non_search_fi <- 
  lapply(1:length(Final_CLS_2022_Study_List_Non_Search_model_file_fi), best_ind_function,df = fits_non_search_fi,
         df2 = Final_CLS_2022_Study_List_Non_Search_model_file_fi) 

coef_non_search_fi <- best_ind_non_search_fi %>% bind_rows #make a matrix of all coefs

best_fit_non_search_fi <- best_ind_non_search_fi %>%
  set_names(names_function(Final_CLS_2022_Study_List_Non_Search_model_file_fi_pre, pa2))  

Create Graph Object

graph_list_fi <- lapply(1:length(best_fit_non_search_fi), graphing_function4, df1 = best_fit_non_search_fi, df2 = Final_CLS_2022_Study_List_Non_Search_model_file_fi)
end_time <- Sys.time()

time_fi = end_time - start_time

SMB - QLeads

Data Readin


start_time <- Sys.time()

Final_CLS_2022_Study_List_Non_Search_model_file_smbq_pre <-
  Final_CLS_2022_Study_List_Non_Search_model_file %>%
  filter(grouped_conversion == 'Lena Q Lead') %>%
  mutate(
    pa2 = "SMB - Q-Lead"
  ) %>%
  #   filter(absolute_lift < 1000)  %>%
  # filter(study_id != '6297420') #%>%
  #  filter(study_id !='149161711') %>%
  #  filter(study_id != '148613002') %>%
  # filter(study_id !='3284625') %>%
  #  filter(study_id !='3329131')
  mutate(
    id2 = row_number()
  )

df_test <-
  Final_CLS_2022_Study_List_Non_Search_model_file_smbq_pre %>%
  # select(-study_id, -id2, -region, -scaling_factor, -quarter, -pa, -study_name)
  select(
    region_v2, country, channel, tactic,
    # treatment_user_count:control,
    cost_spent_on_exposed_group:absolute_lift
  )

iso_smbq <- isolationForest$new(sample_size = nrow(df_test), num_trees = 10000, seed = 1152)

iso_smbq$fit(df_test)

scores_train <- df_test %>%
  iso_smbq$predict() %>%
  arrange(desc(anomaly_score))

Final_CLS_2022_Study_List_Non_Search_model_file_smbq_pre2 <-
  Final_CLS_2022_Study_List_Non_Search_model_file_smbq_pre %>%
  left_join(scores_train, by = c("id2" = "id")) %>% 
  filter(average_depth > 1)

Final_CLS_2022_Study_List_Non_Search_model_file_smbq <-
  Final_CLS_2022_Study_List_Non_Search_model_file_smbq_pre2 %>%
  named_group_split(pa2)

Run Model

fits_non_search_smbq <- model_wrapper_function(df = Final_CLS_2022_Study_List_Non_Search_model_file_smbq,poly_ind = 0)

best_ind_non_search_smbq <- 
  lapply(1:length(Final_CLS_2022_Study_List_Non_Search_model_file_smbq), best_ind_function,df = fits_non_search_smbq,
         df2 = Final_CLS_2022_Study_List_Non_Search_model_file_smbq) 

coef_non_search_smbq <- best_ind_non_search_smbq %>% bind_rows #make a matrix of all coefs

best_fit_non_search_smbq <- best_ind_non_search_smbq %>%
  set_names(names_function(Final_CLS_2022_Study_List_Non_Search_model_file_smbq_pre, pa2))  

Create Graph Object

graph_list_smbq <- lapply(1:length(best_fit_non_search_smbq), graphing_function4, df1 = best_fit_non_search_smbq, df2 = Final_CLS_2022_Study_List_Non_Search_model_file_smbq)
end_time <- Sys.time()

time_smbq = end_time - start_time

SMB - BLeads

Data Readin


start_time <- Sys.time()

Final_CLS_2022_Study_List_Non_Search_model_file_smbb_pre <-
  Final_CLS_2022_Study_List_Non_Search_model_file %>%
  filter(pa == "SMB" & grouped_conversion == 'Lena B Lead') %>%
  mutate(
    pa2 = "SMB - B-Lead"
  ) %>%
  #   filter(absolute_lift < 1000)  %>%
  # filter(study_id != '6297420') #%>%
  #  filter(study_id !='149161711') %>%
  #  filter(study_id != '148613002') %>%
  # filter(study_id !='3284625') %>%
  #  filter(study_id !='3329131')
  mutate(
    id2 = row_number()
  )

df_test <-
  Final_CLS_2022_Study_List_Non_Search_model_file_smbb_pre %>%
  # select(-study_id, -id2, -region, -scaling_factor, -quarter, -pa, -study_name)
  select(
    region_v2, country, channel, tactic,
    # treatment_user_count:control,
    cost_spent_on_exposed_group:absolute_lift
  )

iso_smbb <- isolationForest$new(sample_size = nrow(df_test), num_trees = 10000, seed = 1152)

iso_smbb$fit(df_test)

scores_train <- df_test %>%
  iso_smbb$predict() %>%
  arrange(desc(anomaly_score))

Final_CLS_2022_Study_List_Non_Search_model_file_smbb_pre2 <-
  Final_CLS_2022_Study_List_Non_Search_model_file_smbb_pre %>%
  left_join(scores_train, by = c("id2" = "id")) %>% 
  filter(average_depth > 4)

Final_CLS_2022_Study_List_Non_Search_model_file_smbb <-
  Final_CLS_2022_Study_List_Non_Search_model_file_smbb_pre2 %>%
  named_group_split(channel)

Run Model

fits_non_search_smbb <- model_wrapper_function(df = Final_CLS_2022_Study_List_Non_Search_model_file_smbb,poly_ind = 0)

best_ind_non_search_smbb <- 
  lapply(1:length(Final_CLS_2022_Study_List_Non_Search_model_file_smbb), best_ind_function,df = fits_non_search_smbb,
         df2 = Final_CLS_2022_Study_List_Non_Search_model_file_smbb) 

coef_non_search_smbb <- best_ind_non_search_smbb %>% bind_rows #make a matrix of all coefs

best_fit_non_search_smbb <- best_ind_non_search_smbb %>%
  set_names(names_function(Final_CLS_2022_Study_List_Non_Search_model_file_smbb_pre, channel))  

Create Graph Object

graph_list_smbb <- lapply(1:length(best_fit_non_search_smbb), graphing_function4, df1 = best_fit_non_search_smbb, df2 = Final_CLS_2022_Study_List_Non_Search_model_file_smbb)
end_time <- Sys.time()

time_smbb = end_time - start_time

Export all graph lists

graph_names <- mget(ls(pat = 'graph_list_'))
   
df_names <- mget(setdiff(ls(pattern = 'Final_CLS_2022_Study_List_Non_Search_model_file_'), ls(pattern = "pre")))

#rm(Final_CLS_2022_Study_List_Non_Search_model_file_Chrome,Final_CLS_2022_Study_List_Non_Search_model_file_Cloud,Final_CLS_2022_Study_List_Non_Search_model_file_YouTube)

#lapply(1:length(graph_names),
#      function(j) {
#lapply(1:length(df_names[[j]]),export_rplots_function2,starting_name = "Non_Search_",folder_name = folder_name,df_list = #df_names[[j]],graphing_list = graph_names[j][[1]])
#      }
#       )

Grid of all Response Curves

Sub Plot Documentation

Coef Matrix

Graphs with Anomaly Scores

graph_list.fi <- lapply(1:length(best_fit_non_search_fi), graphing_function4_w_anom, df1 = best_fit_non_search_fi, df2 = Final_CLS_2022_Study_List_Non_Search_model_file_fi)

### Add GG Text Repel
ggplotly(graph_list.fi[[3]])

Create all Response Curves - Ridge/Lasso


start_time <- Sys.time()

fits.non.search.RIDGE_LASSO <- lapply(
  1:length(df_names),
  function(i) {
    model_wrapper_function(df = df_names[i][[1]],poly_ind = 0)
  }
)

Create all Response Curves - RLM



start_time <- Sys.time()

fits.non.search.RLM <- lapply(
  1:length(df_names),
  function(i) {
    model_wrapper_function2(df = df_names[i][[1]])
  }
)

end_time <- Sys.time()

combined_rlm_time <- start_time - end_time

best.ind.non.search.RLM <- lapply(
  1:length(df_names),
  function(i) {   
  lapply(1:length(df_names[i][[1]]), best_ind_function,df = fits.non.search.RLM[i][[1]],
         df2 = df_names[i][[1]])
  }
)

coef.non.search.RLM <- lapply(
  1:length(df_names),
  function (i){
  best.ind.non.search.RLM[i][[1]] %>% bind_rows
  }
) %>%
  bind_rows() %>% 
  as.data.frame() %>% 
  mutate(
    cost_p2 = 0,
    lambda = 0,
    alpha = 0,
    powers2 = 0
  ) %>% 
  select(one_of(colnames(coef.2_matrix)))


best.fit.non.search.RLM <- lapply(1:length(df_names),
      function(j) {
lapply(1:length(best.ind.non.search.RLM[[j]]),
      function(i){
        best.ind.non.search.RLM[j][[1]][i] %>% 
        set_names(nm = best.ind.non.search.RLM[j][[1]][[i]]["model"])
      } 
)
      }
       )


  

-combined_ridge_time+combined_rlm_time
Time difference of 32.768 mins

graph.list.rlm <- lapply(1:length(df_names),
      function(i){
      lapply(1:length(best.fit.non.search.RLM[i][[1]]), graphing_function_rlm, df1= best.fit.non.search.RLM[i],df2 = df_names[i])
      } 
)


graph.list.RIDGE_LASSO <- lapply(1:length(df_names),
      function(i){
      lapply(1:length(best.fit.non.search.RIDGE_LASSO[i][[1]]), graphing_function_elasticnet, df1= best.fit.non.search.RIDGE_LASSO[i],df2 = df_names[i])
      } 
)

Export all Plots


folder_name1 <- paste0("Output/", "outputfiles_", Sys.Date(), "_", "RLM", "/")
dir.create(folder_name1) # it will throw a warning if folder exists
Warning in dir.create(folder_name1) :
  'Output\outputfiles_2022-11-08_RLM' already exists
lapply(1:length(df_names),
      function(j) {
lapply(1:length(df_names[[j]]),export_rplots_function2,starting_name = "Non_Search_",folder_name = folder_name1,df_list = df_names[[j]],graphing_list = graph.list.rlm[j][[1]])
      }
       )
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
[[1]]
[[1]][[1]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_Chrome_All_Channel.png"

[[1]][[2]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_Chrome_non-REMK.png"

[[1]][[3]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_Chrome_REMK.png"


[[2]]
[[2]][[1]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_Cloud_Cloud_-_All_Channel.png"


[[3]]
[[3]][[1]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_DSM_EMEA__DISCOVERY.png"

[[3]][[2]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_DSM_EMEA__DISPLAY.png"

[[3]][[3]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_DSM_EMEA__YOUTUBE.png"

[[3]][[4]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_DSM_NA__DISCOVERY.png"

[[3]][[5]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_DSM_NA__DISPLAY.png"

[[3]][[6]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_DSM_NA__YOUTUBE.png"


[[4]]
[[4]][[1]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_Google_Fi_DISCOVERY.png"

[[4]][[2]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_Google_Fi_DISPLAY.png"

[[4]][[3]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_Google_Fi_YOUTUBE.png"


[[5]]
[[5]][[1]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_Pixel_Pixel_-_All_Channel.png"


[[6]]
[[6]][[1]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_SMB_DISCOVERY.png"

[[6]][[2]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_SMB_DISPLAY.png"

[[6]][[3]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_SMB_YOUTUBE.png"


[[7]]
[[7]][[1]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_SMB-QLead_SMB_-_Q-Lead.png"


[[8]]
[[8]][[1]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_YouTube_APAC.png"

[[8]][[2]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_YouTube_EMEA.png"

[[8]][[3]]
[1] "Output/outputfiles_2022-11-08_RLM/Non_Search_YouTube_NA.png"

folder_name2 <- paste0("Output/", "outputfiles_", Sys.Date(), "_", "ElasticNet", "/")
dir.create(folder_name2) # it will throw a warning if folder exists
Warning in dir.create(folder_name2) :
  'Output\outputfiles_2022-11-08_ElasticNet' already exists
lapply(1:length(df_names),
      function(j) {
lapply(1:length(df_names[[j]]),export_rplots_function2,starting_name = "Non_Search_",folder_name = folder_name2,df_list = df_names[[j]],graphing_list = graph.list.RIDGE_LASSO[j][[1]])
      }
       )
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
Saving 18.8 x 12.5 in image
[[1]]
[[1]][[1]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_Chrome_All_Channel.png"

[[1]][[2]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_Chrome_non-REMK.png"

[[1]][[3]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_Chrome_REMK.png"


[[2]]
[[2]][[1]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_Cloud_Cloud_-_All_Channel.png"


[[3]]
[[3]][[1]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_DSM_EMEA__DISCOVERY.png"

[[3]][[2]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_DSM_EMEA__DISPLAY.png"

[[3]][[3]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_DSM_EMEA__YOUTUBE.png"

[[3]][[4]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_DSM_NA__DISCOVERY.png"

[[3]][[5]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_DSM_NA__DISPLAY.png"

[[3]][[6]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_DSM_NA__YOUTUBE.png"


[[4]]
[[4]][[1]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_Google_Fi_DISCOVERY.png"

[[4]][[2]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_Google_Fi_DISPLAY.png"

[[4]][[3]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_Google_Fi_YOUTUBE.png"


[[5]]
[[5]][[1]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_Pixel_Pixel_-_All_Channel.png"


[[6]]
[[6]][[1]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_SMB_DISCOVERY.png"

[[6]][[2]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_SMB_DISPLAY.png"

[[6]][[3]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_SMB_YOUTUBE.png"


[[7]]
[[7]][[1]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_SMB-QLead_SMB_-_Q-Lead.png"


[[8]]
[[8]][[1]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_YouTube_APAC.png"

[[8]][[2]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_YouTube_EMEA.png"

[[8]][[3]]
[1] "Output/outputfiles_2022-11-08_ElasticNet/Non_Search_YouTube_NA.png"

Show Graphs

lapply(1:length(df_names),
function(j){
  subplot(graph.list.rlm[j][[1]], nrows = length(graph.list.rlm[j][[1]]))
}
)
[[1]]

[[2]]

[[3]]

[[4]]

[[5]]

[[6]]

[[7]]

[[8]]
lapply(1:length(df_names),
function(i){
#p1 = graph_names[i][[1]]
do.call(grid.arrange,graph.list.rlm[i][[1]])
#return(grid.arrange(grobs = p1))
}
)
[[1]]
TableGrob (3 x 1) "arrange": 3 grobs

[[2]]
TableGrob (1 x 1) "arrange": 1 grobs

[[3]]
TableGrob (3 x 2) "arrange": 6 grobs

[[4]]
TableGrob (3 x 1) "arrange": 3 grobs

[[5]]
TableGrob (1 x 1) "arrange": 1 grobs

[[6]]
TableGrob (3 x 1) "arrange": 3 grobs

[[7]]
TableGrob (1 x 1) "arrange": 1 grobs

[[8]]
TableGrob (3 x 1) "arrange": 3 grobs
NA

lapply(1:length(df_names),
function(j){
  subplot(graph.list.RIDGE_LASSO[j][[1]], nrows = length(graph.list.RIDGE_LASSO[j][[1]]))
}
)
[[1]]

[[2]]

[[3]]

[[4]]

[[5]]

[[6]]

[[7]]

[[8]]
lapply(1:length(df_names),
function(i){
#p1 = graph_names[i][[1]]
do.call(grid.arrange,graph.list.RIDGE_LASSO[i][[1]])
#return(grid.arrange(grobs = p1))
}
)
[[1]]
TableGrob (3 x 1) "arrange": 3 grobs

[[2]]
TableGrob (1 x 1) "arrange": 1 grobs

[[3]]
TableGrob (3 x 2) "arrange": 6 grobs

[[4]]
TableGrob (3 x 1) "arrange": 3 grobs

[[5]]
TableGrob (1 x 1) "arrange": 1 grobs

[[6]]
TableGrob (3 x 1) "arrange": 3 grobs

[[7]]
TableGrob (1 x 1) "arrange": 1 grobs

[[8]]
TableGrob (3 x 1) "arrange": 3 grobs
NA

Testing Metafor Package


p_load(lmer4)
Installing package into ‘C:/Users/Admin/Documents/R/win-library/4.1’
(as ‘lib’ is unspecified)
Warning: package ‘lmer4’ is not available for this version of R

A version of this package for your version of R might be available elsewhere,
see the ideas at
https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
Warning: unable to access index for repository http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.1:
  cannot open URL 'http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.1/PACKAGES'
Warning in p_install(package, character.only = TRUE, ...) : 
Warning in library(package, lib.loc = lib.loc, character.only = TRUE, logical.return = TRUE,  :
  there is no package called ‘lmer4’
Warning in p_load(lmer4) : Failed to install/load:
lmer4
p_load(metaforest)
Installing package into ‘C:/Users/Admin/Documents/R/win-library/4.1’
(as ‘lib’ is unspecified)
also installing the dependencies ‘metadat’, ‘mathjaxr’, ‘metafor’

Warning: unable to access index for repository http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.1:
  cannot open URL 'http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.1/PACKAGES'

  There is a binary version available but the source version is later:
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.1/mathjaxr_1.6-0.zip'
Content type 'application/zip' length 970577 bytes (947 KB)
downloaded 947 KB

trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.1/metafor_3.8-1.zip'
Content type 'application/zip' length 4483503 bytes (4.3 MB)
downloaded 4.3 MB

trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.1/metaforest_0.1.3.zip'
Content type 'application/zip' length 217909 bytes (212 KB)
downloaded 212 KB
package ‘mathjaxr’ successfully unpacked and MD5 sums checked
package ‘metafor’ successfully unpacked and MD5 sums checked
package ‘metaforest’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\Admin\AppData\Local\Temp\RtmpGGjNkA\downloaded_packages
installing the source package ‘metadat’

trying URL 'https://cran.rstudio.com/src/contrib/metadat_1.2-0.tar.gz'
Content type 'application/x-gzip' length 964808 bytes (942 KB)
downloaded 942 KB
* installing *source* package 'metadat' ...
** package 'metadat' successfully unpacked and MD5 sums checked
** using staged installation
** R
** data
*** moving datasets to lazyload DB
** inst
** byte-compile and prepare package for lazy loading
** help
*** installing help indices
  converting help for package 'metadat'
    finding HTML links ... done
    dat.aloe2013                            html  
    dat.anand1999                           html  
    dat.assink2016                          html  
    dat.axfors2021                          html  
    dat.bakdash2021                         html  
    dat.baker2009                           html  
    dat.bangertdrowns2004                   html  
    dat.baskerville2012                     html  
    dat.bcg                                 html  
    dat.begg1989                            html  
    dat.berkey1998                          html  
    dat.besson2016                          html  
    dat.bonett2010                          html  
    dat.bornmann2007                        html  
    dat.bourassa1996                        html  
    dat.cannon2006                          html  
    dat.cohen1981                           html  
    dat.colditz1994                         html  
    dat.collins1985a                        html  
    dat.collins1985b                        html  
    dat.craft2003                           html  
    dat.crede2010                           html  
    dat.curtis1998                          html  
    dat.dagostino1998                       html  
    dat.damico2009                          html  
    dat.debruin2009                         html  
    dat.dogliotti2014                       html  
    dat.dong2013                            html  
    dat.dorn2007                            html  
    dat.egger2001                           html  
    dat.fine1993                            html  
    dat.franchini2012                       html  
    dat.frank2008                           html  
    dat.gibson2002                          html  
    dat.graves2010                          html  
    dat.gurusamy2011                        html  
    dat.hackshaw1998                        html  
    dat.hahn2001                            html  
    dat.hannum2020                          html  
    dat.hart1999                            html  
    dat.hartmannboyce2018                   html  
    dat.hasselblad1998                      html  
    dat.hine1989                            html  
    dat.ishak2007                           html  
    dat.kalaian1996                         html  
    dat.kearon1998                          html  
    dat.knapp2017                           html  
    dat.konstantopoulos2011                 html  
    dat.landenberger2005                    html  
    dat.laopaiboon2015                      html  
    dat.lau1992                             html  
    dat.lee2004                             html  
    dat.lehmann2018                         html  
    dat.li2007                              html  
    dat.lim2014                             html  
    dat.linde2005                           html  
    dat.linde2015                           html  
    dat.linde2016                           html  
    dat.lopez2019                           html  
    dat.maire2019                           html  
    dat.mccurdy2020                         html  
    dat.mcdaniel1994                        html  
    dat.michael2013                         html  
    dat.molloy2014                          html  
    dat.moura2021                           html  
    dat.nakagawa2007                        html  
    dat.nielweise2007                       html  
    dat.nielweise2008                       html  
    dat.normand1999                         html  
    dat.obrien2003                          html  
    dat.pagliaro1992                        html  
    dat.pignon2000                          html  
    dat.pritz1997                           html  
    dat.raudenbush1985                      html  
    dat.riley2003                           html  
    dat.senn2013                            html  
    dat.stowe2010                           html  
    dat.tannersmith2016                     html  
    dat.vanhowe1999                         html  
    dat.viechtbauer2021                     html  
    dat.white2020                           html  
    dat.woods2010                           html  
    dat.yusuf1985                           html  
    datsearch                               html  
    metadat-package                         html  
    prep_dat                                html  
** building package indices
** testing if installed package can be loaded from temporary location
*** arch - i386
*** arch - x64
** testing if installed package can be loaded from final location
*** arch - i386
*** arch - x64
** testing if installed package keeps a record of temporary installation path
* DONE (metadat)

The downloaded source packages are in
    ‘C:\Users\Admin\AppData\Local\Temp\RtmpGGjNkA\downloaded_packages’

metaforest installed
Warning: package ‘metaforest’ was built under R version 4.1.3
Warning: package ‘metafor’ was built under R version 4.1.3

Testing on DSM Data

Load in Data



Final_CLS_2022_Study_List_Non_Search_model_file_dsm_pre <-
  Final_CLS_2022_Study_List_Non_Search_model_file %>%
  filter(pa == "DSM") %>%
  filter(region_v2 != "APAC") %>%
  # filter(absolute_lift < 1000) # %>%
  # filter(study_id != '6297420') #%>%
  #  filter(study_id !='149161711') %>%
  #  filter(study_id != '148613002') %>%
  # filter(study_id !='3284625') %>%
  #  filter(study_id !='3329131')
  mutate(
    id2 = row_number()
  )

df_test <-
  Final_CLS_2022_Study_List_Non_Search_model_file_dsm_pre %>%
  # select(-study_id, -id2, -region, -scaling_factor, -quarter, -pa, -study_name)
  select(
    region_v2, country, channel, tactic,
    # treatment_user_count:control,
    cost_spent_on_exposed_group:absolute_lift
  )

iso_dsm <- isolationForest$new(sample_size = nrow(df_test), num_trees = 10000, seed = 1152)

iso_dsm$fit(df_test)

scores_train <- df_test %>%
  iso_dsm$predict() %>%
  arrange(desc(anomaly_score))

Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4_V1 <-
  Final_CLS_2022_Study_List_Non_Search_model_file_dsm_pre %>%
  left_join(scores_train, by = c("id2" = "id")) %>%
 filter(average_depth > 5.1)


Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4 <-
  Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4_V1 %>%
  named_group_split(region_v2)

Standardize Data

Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4_V1 %>% 
  mutate(
    
  )

Run a Fixed-Effects Model

Documentation



#df_smd 


i = 1

yi_DSM = Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4[[i]]['absolute_lift'] %>% unlist()
vi_DSM = Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4[[i]]['Significant_Spend']^1 %>% unlist()
split2 = factor(Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4[[i]]['channel'] %>% unlist(),labels = unique(Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4[[i]]['channel']) %>% unlist())


m_reg <- rma(yi = yi_DSM,     # The d-column of the df, which contains Cohen's d
         vi = vi_DSM   # The vi-column of the df, which contains the variances
         ,mods = ~split2-1 #to remove intercept between slopes
         )  
Warning: There are outcomes with non-positive sampling variances.
Warning: Cannot compute QE-test, I^2, or H^2 when there are non-positive sampling variances in the data.
       

mod1_test

Mixed-Effects Model (k = 26; tau^2 estimator: REML)

tau^2 (estimated amount of residual heterogeneity):     14491.7080 (SE = 25164.4568)
tau (square root of estimated tau^2 value):             120.3815
I^2 (residual heterogeneity / unaccounted variability): 16.12%
H^2 (unaccounted variability / sampling variability):   1.19
R^2 (amount of heterogeneity accounted for):            87.03%

Test for Residual Heterogeneity:
QE(df = 23) = 32.2909, p-val = 0.0942

Test of Moderators (coefficients 2:3):
QM(df = 2) = 25.4474, p-val < .0001

Model Results:

---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#rm(Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4_V1)#,mod1_test, i)

predict(mod1_test)
forest(mod1_test, slab = Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4[[i]]['study_name'] %>% unlist(), addcred = TRUE)

Additional Test


# Specify basic plot, mapping sex to the x-axis, effect size 'd' to the y-axis,
# and 'weights' to the weight parameter.

Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4[[i]] %>% 
  ggplot()+
  aes(
    x = cost_spent_on_exposed_group,
    y = absolute_lift,
    size = 1/sqrt(cost_spent_on_exposed_group)
  ) +
  geom_point(shape = 1) + # Add scatter
  geom_abline(intercept = m_reg$b[1], slope = m_reg$b[3]) + # Add regression line
  theme_bw() + # Apply black and white theme
  theme(legend.position = "none") # Remove legend
---
title: "03_CLS_Spend_Response_Curves_No_Poly"
author: "Essence Global Advanced Analytics Team"
date: "`r Sys.Date()`"
output:
  html_notebook:
    toc: yes
    toc_float: yes
    number_sections: no
    theme: cerulean
    highlight: zenburn
    df_print: paged
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
options(knitr.table.format = "html")
options(digits = 5)
options(scipen = 100)
knitr::opts_chunk$set(tidy.opts = list(width.cutoff = 80), tidy = TRUE)
knitr::opts_chunk$set(fig.width = 15)
knitr::opts_chunk$set(fig.height = 10)
# install.packages("pacman")
library(pacman) # for quick load/install of packages
p_load(dplyr, readr, tidyverse, reticulate, lubridate, janitor, sqldf, googlesheets4)
p_load(skimr, splitstackshape, stringr, rqdatatable)
p_load(moments)
p_load(kableExtra)
p_load(ggplot2, plotly, echarts4r, ggpubr, forcats, scales, RColorBrewer,gridExtra)
p_load(ggthemes)
p_load(caret, recipes)
p_load(glmnet)
p_load(elasticnet)
p_load(Metrics)
p_load(fastDummies)
p_load(broom)
p_load(htmlwidgets)
p_load(solitude)
p_load(mlbench)
p_load(uwot)
```

# Use Dataset created from 02_CLS_Data_Summary_2022_0914_Data_Analysis File

## Loading Data

### Load Google Sheet

```{r}
Final_CLS_2022_Study_List_Non_Search_model_file <- read_sheet(
  "https://docs.google.com/spreadsheets/d/1N48rTeq7md0v8w8pG_8XIiuapPHQAeO5WoWIB3eaceI/edit#gid=1449351377",
  sheet = "FinalDataset_2022_Update"
) %>%
  mutate(
    Significant_Spend =
      as.numeric(
        case_when(
          probability_of_lift >= 0.9 ~ 1,
          TRUE ~ 0
        )
      ),
    country = case_when(
      country == "NA" ~ "US",
      TRUE ~ country
    ),
    region_v2 = case_when(
      country == "US" ~ "NA",
      country == "CA" ~ "NA",
      country == "US + CA" ~ "NA",
      TRUE ~ region
    )
  ) %>%
  filter(channel != "Search") %>%
  # filter out studies without reported lifts
  filter(exposed != -1) %>%
  # filter out google pay study
  filter(study_id != "149142217") %>%
  # filter out very negative absolute lifts
  filter(absolute_lift > -1000) %>%
  mutate(
    pa = case_when(
      pa == "Google Ads" ~ "SMB", # Step 1
      pa == "YouTube" & conversion != "Type 256522942 ([MCC] YouTube TV - Web - Trial Start)" ~ "YTMP", # Step 2
      pa == "YouTube Premium" ~ "YTMP", # Step 2
      conversion == "Type 256522942 ([MCC] YouTube TV - Web - Trial Start)" ~ "YouTube TV", # Step 2
      pa == "Cloud" & conversion != "Type 14257803 (Enterprise - Apps - Signup Confirm - Unique)" ~ "Cloud Workspace", # Step 3
      pa == "Cloud" & conversion == "Type 14257803 (Enterprise - Apps - Signup Confirm - Unique)" ~ "Cloud GCP", # Step 3
      pa == "Project Fi" ~ "Google Fi", # Step 4
      pa == "Google Chrome" ~ "Chrome",
      TRUE ~ pa
    )
  ) %>%
  mutate(
    parsed_type = parse_number(conversion),
    grouped_conversion = case_when(
      conversion %in% c("Chromebook Microsite Referral Clicks Q4 2015", "Type 251422729 (Chromebooks Microsite Referral Clicks (Q4 2017))") ~ "Chromebook Referrals",
      conversion %in% c("Desktop Downloads", "Type 11541547 (Desktop Download)") ~
        "Desktop Downloads",
      pa == "Pixel" ~ "Mobile Conversions",
      pa == "DSM" ~ "Non-Mobile Device Conversions",
      conversion == "Type 302982954 (Lena - P Lead)" ~ "Lena P Lead",
      conversion == "Type 288347008 (LENA - B Lead)" ~ "Lena B Lead",
      conversion == "Type 288697653 (LENA - Q Lead)" ~ "Lena Q Lead",
      parsed_type %in% c(181283993, 855508686) ~ "Workspace Free Trial Start",
      parsed_type == 330755641 ~ "Microsite Conversions",
      parsed_type == 14257803 ~ "Enterprise Signups",
      parsed_type == 289680712 ~ "Google(iOs) First Open",
      parsed_type == 256522942 ~ "YouTube TV - Web - Trial Start",
      parsed_type %in% c(452391534, 221497833, 277150074) ~ "Trial Signups Complete",
      TRUE ~ conversion
    ),
    pa = case_when(
      conversion == "Type 288697653 (LENA - Q Lead)" ~ "SMB-QLead",
      TRUE ~ pa
    )
  ) %>%
  filter(absolute_lift > 0)


# all.equal(Final_CLS_2022_Study_List_Non_Search_model_file,Final_CLS_2022_Study_List_Non_Search_v3)
```

# Create All Response Curves only normal powers

## Folder for all Output and scripts

```{r}
folder_name <- paste0("Output/", "outputfiles_", Sys.Date(), "_", "Run1", "/")
dir.create(folder_name) # it will throw a warning if folder exists

# file.sources2 <- list.files(path = "Output/outputfiles_2022-10-14_Run1//", pattern =".html|.png", full.names = TRUE)
file.sources <- list.files(path = "RScripts/", pattern = "*.R", full.names = TRUE)
sapply(file.sources, source, .GlobalEnv)
```

## Check parameters

```{r}

### powers to try
powers <- seq(0.1, 0.9, by = 0.01)
powers2 <- 1

### Powers to Try
#powers <- seq(0.1, 0.9, by = 0.01)
#powers2 <-seq(1.5,3, by = 0.25)


### Lambda parameters
parameters <- c(
  #  seq(0.1, 2, by =0.1) ,  seq(2, 5, 0.5) ,
  seq(5, 29, 1)
  ,seq(30, 102, 4)
  ,seq(110, 1000, 15)
  ,seq(1000, 10020, 500)
)

### elasticnet parameters
alpha_parameters <- c(seq(0, 1, 0.25))

# For Testing Purposes
#alpha_parameters <- c(seq(1, 1, 1))

```

## Testing Different Model Types

### Chrome

#### Data Readin

```{r}

start_time <- Sys.time()

Final_CLS_2022_Study_List_Non_Search_model_file_chrome_pre <-
  Final_CLS_2022_Study_List_Non_Search_model_file %>%
  filter(pa == "Chrome") %>%
  mutate(
    id2 = row_number()
  )

df_test <-
  Final_CLS_2022_Study_List_Non_Search_model_file_chrome_pre %>%
  select(
    region_v2, country, channel, tactic,
    cost_spent_on_exposed_group:absolute_lift
  )

iso_chrome <- isolationForest$new(sample_size = nrow(df_test), num_trees = 10000, seed = 1153)

iso_chrome$fit(df_test)

scores_train <- df_test %>%
  iso_chrome$predict() %>%
  arrange(desc(anomaly_score))

Final_CLS_2022_Study_List_Non_Search_model_file_chrome_pre2 <-
  Final_CLS_2022_Study_List_Non_Search_model_file_chrome_pre %>%
  left_join(scores_train, by = c("id2" = "id")) %>% 
  filter(average_depth > 3)


Final_CLS_2022_Study_List_Non_Search_model_file_chrome <-
  Final_CLS_2022_Study_List_Non_Search_model_file_chrome_pre2 %>%
  named_group_split(tactic)
```

#### Run Model

```{r, warning = false}


fits_non_search_chrome <- model_wrapper_function(df = Final_CLS_2022_Study_List_Non_Search_model_file_chrome,poly_ind = 0)

best_ind_non_search_chrome <- 
  lapply(1:length(Final_CLS_2022_Study_List_Non_Search_model_file_chrome), best_ind_function,df = fits_non_search_chrome,
         df2 = Final_CLS_2022_Study_List_Non_Search_model_file_chrome) 

coef_non_search_chrome <- best_ind_non_search_chrome %>% bind_rows #make a matrix of all coefs

best_fit_non_search_chrome <- best_ind_non_search_chrome %>%
  set_names(names_function(Final_CLS_2022_Study_List_Non_Search_model_file_chrome_pre, tactic))  
```

#### Create Graph Object

```{r}
graph_list_chrome <- lapply(1:length(best_fit_non_search_chrome), graphing_function4, df1 = best_fit_non_search_chrome, df2 = Final_CLS_2022_Study_List_Non_Search_model_file_chrome)
```

```{r}
end_time <- Sys.time()

time_chrome = end_time - start_time

time_chrome
```

### Cloud

#### Data Readin

```{r}
start_time <- Sys.time()

Final_CLS_2022_Study_List_Non_Search_model_file_cloud_pre <-
  Final_CLS_2022_Study_List_Non_Search_model_file %>%
  filter(pa %in% c("Cloud GCP", "Cloud Workspace")) %>%
  mutate(
    pa = "Cloud",
    pa2 = "Cloud - All Channel"
  ) %>%
  mutate(
    id2 = row_number()
  )

df_test <-
  Final_CLS_2022_Study_List_Non_Search_model_file_cloud_pre %>%
  # select(-study_id, -id2, -region, -scaling_factor, -quarter, -pa, -study_name)
  select(
    region_v2, country, channel, tactic,
    # treatment_user_count:control,
    cost_spent_on_exposed_group:absolute_lift, parsed_type
  )

iso_cloud <- isolationForest$new(sample_size = nrow(df_test), num_trees = 10000, seed = 1153)

iso_cloud$fit(df_test)

scores_train <- df_test %>%
  iso_cloud$predict() %>%
  arrange(desc(anomaly_score))

Final_CLS_2022_Study_List_Non_Search_model_file_cloud_pre2 <-
  Final_CLS_2022_Study_List_Non_Search_model_file_cloud_pre %>%
  left_join(scores_train, by = c("id2" = "id"))

Final_CLS_2022_Study_List_Non_Search_model_file_cloud <-
  Final_CLS_2022_Study_List_Non_Search_model_file_cloud_pre2 %>%
  named_group_split(pa2)

```

#### Run Model

```{r, warning = false}
fits_non_search_cloud <- model_wrapper_function(df = Final_CLS_2022_Study_List_Non_Search_model_file_cloud,poly_ind = 0)

best_ind_non_search_cloud <- 
  lapply(1:length(Final_CLS_2022_Study_List_Non_Search_model_file_cloud), best_ind_function,df = fits_non_search_cloud,
         df2 = Final_CLS_2022_Study_List_Non_Search_model_file_cloud) 

coef_non_search_cloud <- best_ind_non_search_cloud %>% bind_rows #make a matrix of all coefs

best_fit_non_search_cloud <- best_ind_non_search_cloud %>%
  set_names(names_function(Final_CLS_2022_Study_List_Non_Search_model_file_cloud_pre, pa2))  
```

#### Create Graph Object

```{r}
graph_list_cloud <- lapply(1:length(best_fit_non_search_cloud), graphing_function4, df1 = best_fit_non_search_cloud, df2 = Final_CLS_2022_Study_List_Non_Search_model_file_cloud)
```


```{r}
end_time <- Sys.time()

time_cloud = end_time - start_time
```

### YouTube

#### Data Readin

```{r}

start_time <- Sys.time()

Final_CLS_2022_Study_List_Non_Search_model_file_youtube_pre <-
  Final_CLS_2022_Study_List_Non_Search_model_file %>%
  filter(pa %in% c("YouTube TV", "YTMP")) %>%
  mutate(
    pa = "YouTube",
    pa2 = "YouTube"
  ) %>%
  #  filter(absolute_lift < 5000) %>%
  mutate(
    id2 = row_number()
  )

df_test <-
  Final_CLS_2022_Study_List_Non_Search_model_file_youtube_pre %>%
  # select(-study_id, -id2, -region, -scaling_factor, -quarter, -pa, -study_name)
  select(
    region_v2, country, channel, tactic,
    # treatment_user_count:control,
    cost_spent_on_exposed_group:absolute_lift, parsed_type
  )

iso_yt <- isolationForest$new(sample_size = nrow(df_test), num_trees = 10000, seed = 1153)

iso_yt$fit(df_test)

scores_train <- df_test %>%
  iso_yt$predict() %>%
  arrange(desc(anomaly_score))

Final_CLS_2022_Study_List_Non_Search_model_file_youtube_pre2 <-
  Final_CLS_2022_Study_List_Non_Search_model_file_youtube_pre %>%
  left_join(scores_train, by = c("id2" = "id")) %>%
  filter(average_depth > 3.89)

Final_CLS_2022_Study_List_Non_Search_model_file_youtube <-
  Final_CLS_2022_Study_List_Non_Search_model_file_youtube_pre2 %>%
  named_group_split(region_v2)
```

#### Run Model

```{r, warning = false}
fits_non_search_youtube <- model_wrapper_function(df = Final_CLS_2022_Study_List_Non_Search_model_file_youtube,poly_ind = 0)

best_ind_non_search_youtube <- 
  lapply(1:length(Final_CLS_2022_Study_List_Non_Search_model_file_youtube), best_ind_function,df = fits_non_search_youtube,
         df2 = Final_CLS_2022_Study_List_Non_Search_model_file_youtube) 

coef_non_search_youtube <- best_ind_non_search_youtube %>% bind_rows #make a matrix of all coefs

best_fit_non_search_youtube <- best_ind_non_search_youtube %>%
  set_names(names_function(Final_CLS_2022_Study_List_Non_Search_model_file_youtube_pre, pa2))  
```

#### Create Graph Object

```{r}
graph_list_youtube <- lapply(1:length(best_fit_non_search_youtube), graphing_function4, df1 = best_fit_non_search_youtube, df2 = Final_CLS_2022_Study_List_Non_Search_model_file_youtube)
```


```{r}
end_time <- Sys.time()

time_youtube = end_time - start_time
```


### DSM

#### Data Readin

```{r}

start_time <- Sys.time()

Final_CLS_2022_Study_List_Non_Search_model_file_dsm_pre <-
  Final_CLS_2022_Study_List_Non_Search_model_file %>%
  filter(pa == "DSM") %>%
  filter(region_v2 != "APAC") %>%
  # filter(absolute_lift < 1000) # %>%
  # filter(study_id != '6297420') #%>%
  #  filter(study_id !='149161711') %>%
  #  filter(study_id != '148613002') %>%
  # filter(study_id !='3284625') %>%
  #  filter(study_id !='3329131')
  mutate(
    id2 = row_number()
  )

df_test <-
  Final_CLS_2022_Study_List_Non_Search_model_file_dsm_pre %>%
  # select(-study_id, -id2, -region, -scaling_factor, -quarter, -pa, -study_name)
  select(
    region_v2, country, channel, tactic,
    # treatment_user_count:control,
    cost_spent_on_exposed_group:absolute_lift
  )

iso_dsm <- isolationForest$new(sample_size = nrow(df_test), num_trees = 10000, seed = 1152)

iso_dsm$fit(df_test)

scores_train <- df_test %>%
  iso_dsm$predict() %>%
  arrange(desc(anomaly_score))

Final_CLS_2022_Study_List_Non_Search_model_file_dsm_pre2 <-
  Final_CLS_2022_Study_List_Non_Search_model_file_dsm_pre %>%
  left_join(scores_train, by = c("id2" = "id")) %>%
  filter(average_depth > 5)

Final_CLS_2022_Study_List_Non_Search_model_file_dsm <-
  Final_CLS_2022_Study_List_Non_Search_model_file_dsm_pre2 %>%
  named_group_split(region_v2, channel)
```

#### Run Model

```{r, warning = false}
fits_non_search_dsm <- model_wrapper_function(df = Final_CLS_2022_Study_List_Non_Search_model_file_dsm,poly_ind = 0)

best_ind_non_search_dsm <- 
  lapply(1:length(Final_CLS_2022_Study_List_Non_Search_model_file_dsm), best_ind_function,df = fits_non_search_dsm,
         df2 = Final_CLS_2022_Study_List_Non_Search_model_file_dsm)

coef_non_search_dsm <- best_ind_non_search_dsm %>% bind_rows #make a matrix of all coefs

best_fit_non_search_dsm <- best_ind_non_search_dsm %>%
  set_names(names_function(Final_CLS_2022_Study_List_Non_Search_model_file_dsm_pre, region_v2,channel))  
```

#### Create Graph Object

```{r}
graph_list_dsm <- lapply(1:length(best_fit_non_search_dsm), graphing_function4, df1 = best_fit_non_search_dsm, df2 = Final_CLS_2022_Study_List_Non_Search_model_file_dsm)
```


```{r}
end_time <- Sys.time()

time_dsm = end_time - start_time
```


### Pixel

#### Data Readin

```{r}

start_time <- Sys.time()

Final_CLS_2022_Study_List_Non_Search_model_file_pixel_pre <-
  Final_CLS_2022_Study_List_Non_Search_model_file %>%
  filter(pa == "Pixel") %>%
  mutate(
    pa2 = "Pixel - All Channel"
  ) %>%
  #   filter(absolute_lift < 1000)  %>%
  # filter(study_id != '6297420') #%>%
  #  filter(study_id !='149161711') %>%
  #  filter(study_id != '148613002') %>%
  # filter(study_id !='3284625') %>%
  #  filter(study_id !='3329131')
  mutate(
    id2 = row_number()
  )

df_test <-
  Final_CLS_2022_Study_List_Non_Search_model_file_pixel_pre %>%
  # select(-study_id, -id2, -region, -scaling_factor, -quarter, -pa, -study_name)
  select(
    region_v2, country, channel, tactic,
    # treatment_user_count:control,
    cost_spent_on_exposed_group:absolute_lift
  )

iso_pixel <- isolationForest$new(sample_size = nrow(df_test), num_trees = 10000, seed = 1152)

iso_pixel$fit(df_test)

scores_train <- df_test %>%
  iso_pixel$predict() %>%
  arrange(desc(anomaly_score))

Final_CLS_2022_Study_List_Non_Search_model_file_pixel_pre2 <-
  Final_CLS_2022_Study_List_Non_Search_model_file_pixel_pre %>%
  left_join(scores_train, by = c("id2" = "id")) %>%
  filter(average_depth > 3.1)

Final_CLS_2022_Study_List_Non_Search_model_file_pixel_pre2


Final_CLS_2022_Study_List_Non_Search_model_file_pixel <-
  Final_CLS_2022_Study_List_Non_Search_model_file_pixel_pre2 %>%
  named_group_split(pa2)
```

#### Run Model

```{r, warning = false}
fits_non_search_pixel <- model_wrapper_function(df = Final_CLS_2022_Study_List_Non_Search_model_file_pixel,poly_ind = 0)

best_ind_non_search_pixel <- 
  lapply(1:length(Final_CLS_2022_Study_List_Non_Search_model_file_pixel), best_ind_function,df = fits_non_search_pixel,
         df2 = Final_CLS_2022_Study_List_Non_Search_model_file_pixel) 

coef_non_search_pixel <- best_ind_non_search_pixel %>% bind_rows #make a matrix of all coefs

best_fit_non_search_pixel <- best_ind_non_search_pixel %>%
  set_names(names_function(Final_CLS_2022_Study_List_Non_Search_model_file_pixel_pre, pa2))  
```

#### Create Graph Object

```{r}
graph_list_pixel <- lapply(1:length(best_fit_non_search_pixel), graphing_function4, df1 = best_fit_non_search_pixel, df2 = Final_CLS_2022_Study_List_Non_Search_model_file_pixel)
```


```{r}
end_time <- Sys.time()

time_pixel = end_time - start_time
```


### Fi

#### Data Readin

```{r}

start_time <- Sys.time()

Final_CLS_2022_Study_List_Non_Search_model_file_fi_pre <-
  Final_CLS_2022_Study_List_Non_Search_model_file %>%
  filter(pa == "Google Fi") %>%
  mutate(
    pa2 = "Fi - All Channel"
  ) %>%
  #   filter(absolute_lift < 1000)  %>%
  # filter(study_id != '6297420') #%>%
  #  filter(study_id !='149161711') %>%
  #  filter(study_id != '148613002') %>%
  # filter(study_id !='3284625') %>%
  #  filter(study_id !='3329131')
  mutate(
    id2 = row_number()
  )

df_test <-
  Final_CLS_2022_Study_List_Non_Search_model_file_fi_pre %>%
  # select(-study_id, -id2, -region, -scaling_factor, -quarter, -pa, -study_name)
  select(
    region_v2, country, channel, tactic,
    # treatment_user_count:control,
    cost_spent_on_exposed_group:absolute_lift
  )

iso_fi <- isolationForest$new(sample_size = nrow(df_test), num_trees = 10000, seed = 1152)

iso_fi$fit(df_test)

scores_train <- df_test %>%
  iso_fi$predict() %>%
  arrange(desc(anomaly_score))

Final_CLS_2022_Study_List_Non_Search_model_file_fi_pre2 <-
  Final_CLS_2022_Study_List_Non_Search_model_file_fi_pre %>%
  left_join(scores_train, by = c("id2" = "id")) %>%
  filter(average_depth > 4.75)

Final_CLS_2022_Study_List_Non_Search_model_file_fi_pre2


Final_CLS_2022_Study_List_Non_Search_model_file_fi <-
  Final_CLS_2022_Study_List_Non_Search_model_file_fi_pre2 %>%
  named_group_split(channel)
```

#### Run Model

```{r, warning = false}
fits_non_search_fi <- model_wrapper_function(df = Final_CLS_2022_Study_List_Non_Search_model_file_fi,poly_ind = 0)

best_ind_non_search_fi <- 
  lapply(1:length(Final_CLS_2022_Study_List_Non_Search_model_file_fi), best_ind_function,df = fits_non_search_fi,
         df2 = Final_CLS_2022_Study_List_Non_Search_model_file_fi) 

coef_non_search_fi <- best_ind_non_search_fi %>% bind_rows #make a matrix of all coefs

best_fit_non_search_fi <- best_ind_non_search_fi %>%
  set_names(names_function(Final_CLS_2022_Study_List_Non_Search_model_file_fi_pre, pa2))  
```

#### Create Graph Object

```{r}
graph_list_fi <- lapply(1:length(best_fit_non_search_fi), graphing_function4, df1 = best_fit_non_search_fi, df2 = Final_CLS_2022_Study_List_Non_Search_model_file_fi)
```


```{r}
end_time <- Sys.time()

time_fi = end_time - start_time
```


### SMB - QLeads

#### Data Readin

```{r}

start_time <- Sys.time()

Final_CLS_2022_Study_List_Non_Search_model_file_smbq_pre <-
  Final_CLS_2022_Study_List_Non_Search_model_file %>%
  filter(grouped_conversion == 'Lena Q Lead') %>%
  mutate(
    pa2 = "SMB - Q-Lead"
  ) %>%
  #   filter(absolute_lift < 1000)  %>%
  # filter(study_id != '6297420') #%>%
  #  filter(study_id !='149161711') %>%
  #  filter(study_id != '148613002') %>%
  # filter(study_id !='3284625') %>%
  #  filter(study_id !='3329131')
  mutate(
    id2 = row_number()
  )

df_test <-
  Final_CLS_2022_Study_List_Non_Search_model_file_smbq_pre %>%
  # select(-study_id, -id2, -region, -scaling_factor, -quarter, -pa, -study_name)
  select(
    region_v2, country, channel, tactic,
    # treatment_user_count:control,
    cost_spent_on_exposed_group:absolute_lift
  )

iso_smbq <- isolationForest$new(sample_size = nrow(df_test), num_trees = 10000, seed = 1152)

iso_smbq$fit(df_test)

scores_train <- df_test %>%
  iso_smbq$predict() %>%
  arrange(desc(anomaly_score))

Final_CLS_2022_Study_List_Non_Search_model_file_smbq_pre2 <-
  Final_CLS_2022_Study_List_Non_Search_model_file_smbq_pre %>%
  left_join(scores_train, by = c("id2" = "id")) %>% 
  filter(average_depth > 1)

Final_CLS_2022_Study_List_Non_Search_model_file_smbq <-
  Final_CLS_2022_Study_List_Non_Search_model_file_smbq_pre2 %>%
  named_group_split(pa2)
```

#### Run Model

```{r, warning = false}
fits_non_search_smbq <- model_wrapper_function(df = Final_CLS_2022_Study_List_Non_Search_model_file_smbq,poly_ind = 0)

best_ind_non_search_smbq <- 
  lapply(1:length(Final_CLS_2022_Study_List_Non_Search_model_file_smbq), best_ind_function,df = fits_non_search_smbq,
         df2 = Final_CLS_2022_Study_List_Non_Search_model_file_smbq) 

coef_non_search_smbq <- best_ind_non_search_smbq %>% bind_rows #make a matrix of all coefs

best_fit_non_search_smbq <- best_ind_non_search_smbq %>%
  set_names(names_function(Final_CLS_2022_Study_List_Non_Search_model_file_smbq_pre, pa2))  
```

#### Create Graph Object

```{r}
graph_list_smbq <- lapply(1:length(best_fit_non_search_smbq), graphing_function4, df1 = best_fit_non_search_smbq, df2 = Final_CLS_2022_Study_List_Non_Search_model_file_smbq)
```


```{r}
end_time <- Sys.time()

time_smbq = end_time - start_time
```


### SMB - BLeads

#### Data Readin

```{r}

start_time <- Sys.time()

Final_CLS_2022_Study_List_Non_Search_model_file_smbb_pre <-
  Final_CLS_2022_Study_List_Non_Search_model_file %>%
  filter(pa == "SMB" & grouped_conversion == 'Lena B Lead') %>%
  mutate(
    pa2 = "SMB - B-Lead"
  ) %>%
  #   filter(absolute_lift < 1000)  %>%
  # filter(study_id != '6297420') #%>%
  #  filter(study_id !='149161711') %>%
  #  filter(study_id != '148613002') %>%
  # filter(study_id !='3284625') %>%
  #  filter(study_id !='3329131')
  mutate(
    id2 = row_number()
  )

df_test <-
  Final_CLS_2022_Study_List_Non_Search_model_file_smbb_pre %>%
  # select(-study_id, -id2, -region, -scaling_factor, -quarter, -pa, -study_name)
  select(
    region_v2, country, channel, tactic,
    # treatment_user_count:control,
    cost_spent_on_exposed_group:absolute_lift
  )

iso_smbb <- isolationForest$new(sample_size = nrow(df_test), num_trees = 10000, seed = 1152)

iso_smbb$fit(df_test)

scores_train <- df_test %>%
  iso_smbb$predict() %>%
  arrange(desc(anomaly_score))

Final_CLS_2022_Study_List_Non_Search_model_file_smbb_pre2 <-
  Final_CLS_2022_Study_List_Non_Search_model_file_smbb_pre %>%
  left_join(scores_train, by = c("id2" = "id")) %>% 
  filter(average_depth > 4)

Final_CLS_2022_Study_List_Non_Search_model_file_smbb <-
  Final_CLS_2022_Study_List_Non_Search_model_file_smbb_pre2 %>%
  named_group_split(channel)
```

#### Run Model

```{r, warning = false}
fits_non_search_smbb <- model_wrapper_function(df = Final_CLS_2022_Study_List_Non_Search_model_file_smbb,poly_ind = 0)

best_ind_non_search_smbb <- 
  lapply(1:length(Final_CLS_2022_Study_List_Non_Search_model_file_smbb), best_ind_function,df = fits_non_search_smbb,
         df2 = Final_CLS_2022_Study_List_Non_Search_model_file_smbb) 

coef_non_search_smbb <- best_ind_non_search_smbb %>% bind_rows #make a matrix of all coefs

best_fit_non_search_smbb <- best_ind_non_search_smbb %>%
  set_names(names_function(Final_CLS_2022_Study_List_Non_Search_model_file_smbb_pre, channel))  
```

#### Create Graph Object

```{r}
graph_list_smbb <- lapply(1:length(best_fit_non_search_smbb), graphing_function4, df1 = best_fit_non_search_smbb, df2 = Final_CLS_2022_Study_List_Non_Search_model_file_smbb)
```


```{r}
end_time <- Sys.time()

time_smbb = end_time - start_time
```

## Export all graph lists

```{r}
graph_names <- mget(ls(pat = 'graph_list_'))
   
df_names <- mget(setdiff(ls(pattern = 'Final_CLS_2022_Study_List_Non_Search_model_file_'), ls(pattern = "pre")))

#rm(Final_CLS_2022_Study_List_Non_Search_model_file_Chrome,Final_CLS_2022_Study_List_Non_Search_model_file_Cloud,Final_CLS_2022_Study_List_Non_Search_model_file_YouTube)

#lapply(1:length(graph_names),
#      function(j) {
#lapply(1:length(df_names[[j]]),export_rplots_function2,starting_name = "Non_Search_",folder_name = folder_name,df_list = #df_names[[j]],graphing_list = graph_names[j][[1]])
#      }
#       )
```

## Grid of all Response Curves

[*Sub Plot Documentation*](https://plotly.com/r/subplots/)

```{r, fig.height= 15, echo=FALSE,message=FALSE, warning = FALSE}

lapply(1:length(graph_names),
function(i){
  subplot(graph_names[i][[1]], nrows = length(graph_names[i][[1]]))
}
)


lapply(1:length(graph_names),
function(i){
#p1 = graph_names[i][[1]]
do.call(grid.arrange,graph_names[i][[1]])
#return(grid.arrange(grobs = p1))
}
)

```

## Coef Matrix

```{r}
coef.2_matrix <- mget((ls(pat = 'coef_'))) %>%  bind_rows()

coef.2_matrix


```

## Graphs with Anomaly Scores

```{r}
graph_list.fi <- lapply(1:length(best_fit_non_search_fi), graphing_function4_w_anom, df1 = best_fit_non_search_fi, df2 = Final_CLS_2022_Study_List_Non_Search_model_file_fi)

### Add GG Text Repel
ggplotly(graph_list.fi[[3]])

```

# Create all Response Curves - Ridge/Lasso

```{r, warning = FALSE}

start_time <- Sys.time()

fits.non.search.RIDGE_LASSO <- lapply(
  1:length(df_names),
  function(i) {
    model_wrapper_function(df = df_names[i][[1]],poly_ind = 0)
  }
)

end_time <- Sys.time()

combined_ridge_time <- start_time - end_time

best.ind.non.search.RIDGE_LASSO <- lapply(
  1:length(df_names),
  function(i) {   
  lapply(1:length(df_names[i][[1]]), best_ind_function,df = fits.non.search.RIDGE_LASSO[i][[1]],
         df2 = df_names[i][[1]])
  }
)

coef.non.search.RIDGE_LASSO <- lapply(
  1:length(df_names),
  function (i){
  best.ind.non.search.RIDGE_LASSO[i][[1]] %>% bind_rows
  }
) %>%
  bind_rows() %>% 
  as.data.frame() %>% 
#  mutate(
#    cost_p2 = 0,
#    lambda = 0,
#    alpha = 0,
#    powers2 = 0
#  ) %>% 
  select(one_of(colnames(coef.2_matrix)))


best.fit.non.search.RIDGE_LASSO <- lapply(1:length(df_names),
      function(j) {
lapply(1:length(best.ind.non.search.RIDGE_LASSO[[j]]),
      function(i){
        best.ind.non.search.RIDGE_LASSO[j][[1]][i] %>% 
        set_names(nm = best.ind.non.search.RIDGE_LASSO[j][[1]][[i]]["model"])
      } 
)
      }
       )
  


```


# Create all Response Curves - RLM

```{r, warning = FALSE}


start_time <- Sys.time()

fits.non.search.RLM <- lapply(
  1:length(df_names),
  function(i) {
    model_wrapper_function2(df = df_names[i][[1]])
  }
)

end_time <- Sys.time()

combined_rlm_time <- start_time - end_time

best.ind.non.search.RLM <- lapply(
  1:length(df_names),
  function(i) {   
  lapply(1:length(df_names[i][[1]]), best_ind_function,df = fits.non.search.RLM[i][[1]],
         df2 = df_names[i][[1]])
  }
)

coef.non.search.RLM <- lapply(
  1:length(df_names),
  function (i){
  best.ind.non.search.RLM[i][[1]] %>% bind_rows
  }
) %>%
  bind_rows() %>% 
  as.data.frame() %>% 
  mutate(
    cost_p2 = 0,
    lambda = 0,
    alpha = 0,
    powers2 = 0
  ) %>% 
  select(one_of(colnames(coef.2_matrix)))


best.fit.non.search.RLM <- lapply(1:length(df_names),
      function(j) {
lapply(1:length(best.ind.non.search.RLM[[j]]),
      function(i){
        best.ind.non.search.RLM[j][[1]][i] %>% 
        set_names(nm = best.ind.non.search.RLM[j][[1]][[i]]["model"])
      } 
)
      }
       )


  
```


```{r}

-combined_ridge_time+combined_rlm_time

```


```{r, warning = FALSE}

graph.list.rlm <- lapply(1:length(df_names),
      function(i){
      lapply(1:length(best.fit.non.search.RLM[i][[1]]), graphing_function_rlm, df1= best.fit.non.search.RLM[i],df2 = df_names[i])
      } 
)


graph.list.RIDGE_LASSO <- lapply(1:length(df_names),
      function(i){
      lapply(1:length(best.fit.non.search.RIDGE_LASSO[i][[1]]), graphing_function_elasticnet, df1= best.fit.non.search.RIDGE_LASSO[i],df2 = df_names[i])
      } 
)




```


### Export all Plots

```{r}

folder_name1 <- paste0("Output/", "outputfiles_", Sys.Date(), "_", "RLM", "/")
dir.create(folder_name1) # it will throw a warning if folder exists

lapply(1:length(df_names),
      function(j) {
lapply(1:length(df_names[[j]]),export_rplots_function2,starting_name = "Non_Search_",folder_name = folder_name1,df_list = df_names[[j]],graphing_list = graph.list.rlm[j][[1]])
      }
       )

folder_name2 <- paste0("Output/", "outputfiles_", Sys.Date(), "_", "ElasticNet", "/")
dir.create(folder_name2) # it will throw a warning if folder exists


lapply(1:length(df_names),
      function(j) {
lapply(1:length(df_names[[j]]),export_rplots_function2,starting_name = "Non_Search_",folder_name = folder_name2,df_list = df_names[[j]],graphing_list = graph.list.RIDGE_LASSO[j][[1]])
      }
       )

```

### Show Graphs

```{r}
lapply(1:length(df_names),
function(j){
  subplot(graph.list.rlm[j][[1]], nrows = length(graph.list.rlm[j][[1]]))
}
)


lapply(1:length(df_names),
function(i){
#p1 = graph_names[i][[1]]
do.call(grid.arrange,graph.list.rlm[i][[1]])
#return(grid.arrange(grobs = p1))
}
)



```

```{r}
lapply(1:length(df_names),
function(j){
  subplot(graph.list.RIDGE_LASSO[j][[1]], nrows = length(graph.list.RIDGE_LASSO[j][[1]]))
}
)


lapply(1:length(df_names),
function(i){
#p1 = graph_names[i][[1]]
do.call(grid.arrange,graph.list.RIDGE_LASSO[i][[1]])
#return(grid.arrange(grobs = p1))
}
)

```


# Testing Metafor Package

```{r}

p_load(lme4)
p_load(metaforest)

```

## Testing on DSM Data

### Load in Data
```{r}


Final_CLS_2022_Study_List_Non_Search_model_file_dsm_pre <-
  Final_CLS_2022_Study_List_Non_Search_model_file %>%
  filter(pa == "DSM") %>%
  filter(region_v2 != "APAC") %>%
  # filter(absolute_lift < 1000) # %>%
  # filter(study_id != '6297420') #%>%
  #  filter(study_id !='149161711') %>%
  #  filter(study_id != '148613002') %>%
  # filter(study_id !='3284625') %>%
  #  filter(study_id !='3329131')
  mutate(
    id2 = row_number()
  )

df_test <-
  Final_CLS_2022_Study_List_Non_Search_model_file_dsm_pre %>%
  # select(-study_id, -id2, -region, -scaling_factor, -quarter, -pa, -study_name)
  select(
    region_v2, country, channel, tactic,
    # treatment_user_count:control,
    cost_spent_on_exposed_group:absolute_lift
  )

iso_dsm <- isolationForest$new(sample_size = nrow(df_test), num_trees = 10000, seed = 1152)

iso_dsm$fit(df_test)

scores_train <- df_test %>%
  iso_dsm$predict() %>%
  arrange(desc(anomaly_score))

Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4_V1 <-
  Final_CLS_2022_Study_List_Non_Search_model_file_dsm_pre %>%
  left_join(scores_train, by = c("id2" = "id")) %>%
 filter(average_depth > 5.1)


Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4 <-
  Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4_V1 %>%
  named_group_split(region_v2)

```

### Standardize Data

```{r}
Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4_V1 %>% 
  mutate(
    
  )
```


### Run a Fixed-Effects Model
[Documentation](https://cjvanlissa.github.io/Doing-Meta-Analysis-in-R/fixedef.html)
```{r, fig.height= 15}


#df_smd 


i = 1

yi_DSM = Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4[[i]]['absolute_lift'] %>% unlist()
vi_DSM = Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4[[i]]['Significant_Spend']^1 %>% unlist()
split2 = factor(Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4[[i]]['channel'] %>% unlist(),labels = unique(Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4[[i]]['channel']) %>% unlist())


m_reg <- rma(yi = yi_DSM,     # The d-column of the df, which contains Cohen's d
         vi = vi_DSM   # The vi-column of the df, which contains the variances
         ,mods = ~split2-1 #to remove intercept between slopes
         )  
       

mod1_test

#rm(Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4_V1)#,mod1_test, i)

predict(mod1_test)


forest(mod1_test, slab = Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4[[i]]['study_name'] %>% unlist(), addcred = TRUE)
```
### Additional Test

```{r}

# Specify basic plot, mapping sex to the x-axis, effect size 'd' to the y-axis,
# and 'weights' to the weight parameter.

Final_CLS_2022_Study_List_Non_Search_model_file_dsm_Meta4[[i]] %>% 
  ggplot()+
  aes(
    x = cost_spent_on_exposed_group,
    y = absolute_lift,
    size = 1/sqrt(cost_spent_on_exposed_group)
  ) +
  geom_point(shape = 1) + # Add scatter
  geom_abline(intercept = m_reg$b[1], slope = m_reg$b[3]) + # Add regression line
  theme_bw() + # Apply black and white theme
  theme(legend.position = "none") # Remove legend

```

```{r}

```

